*** this code exports main overtime trend plots for ANES ***
*** LHS: PID ***
*** RHS: small biz dummy ***
*** unadjusted + adjusted by gender, race, education, income, region ***

clear all
set type double
set varabbrev on, perm

*** FILL IN MAIN PATH TO RUN CODE ***
global dropbox "..."

global folder "$dropbox/_0_bjps_replication"
global in "$folder/in"
global out "$folder/out"

global graph "$out/graph"
global table "$out/table"
global temp "`c(tmpdir)'"

set maxvar 120000

*** ANES ***
******************************************************************************
*** 2020 has seperate file ***
import delimited using "$in/anes_timeseries_2020_csv_20220210.csv", varname(1)clear

gen self_emp = v201529==7|v201529==8 if v201529>0

gen pid = (v201231x-1)/6 if v201231x>0

g working = v201534x == 1 if v201534x>0
replace working = 1 if self_emp == 1 //CHECK

gen age_group = 0 if v201507x == -9 //no missing
replace age_group = 1 if inrange(v201507x, 18,24)
replace age_group = 2 if inrange(v201507x, 25, 34)
replace age_group = 3 if inrange(v201507x, 35, 44)
replace age_group = 4 if inrange(v201507x, 45, 54)
replace age_group = 5 if inrange(v201507x, 55, 64)
replace age_group = 6 if inrange(v201507x, 65, 74)
replace age_group = 7 if v201507x > 74

gen edu_detail = v201510
recode edu_detail (-9 = 0)
recode edu_detail (-8 = 0)
recode edu_detail (95 = 0)

** edu group categorization is different from previous years **
g edu = edu_detail
replace edu = 3 if inrange(edu_detail, 3,5)
replace edu = 4 if inrange(edu_detail, 6,8)

gen income_group_detail = v201617x if v201617x>0
recode income_group_detail (.=0)

** income group categorization is different from previous years **
** previous years have 1 = 0-16 percentile, 2 = 17-33, 3 = 34-67, 4 = 68-95, 5 = 96-100 **
preserve
	keep income_group_detail v200010a
	keep if income_group_detail != 0
	sa "$temp/income only", replace
	
	collapse (p16)p_16 = income_group_detail (p33)p_33 = income_group_detail (p67)p_67 = income_group_detail (p95)p_95 = income_group_detail [pweight = v200010a]
	g z = 1
	sa "$temp/new_cutoff", replace
restore

g z = 1
merge m:1 z using "$temp/new_cutoff", assert(3)nogen

g income_group = 1 if income_group_detail <= p_16
replace income_group = 2 if income_group_detail <= p_33 & mi(income_group)
replace income_group = 3 if income_group_detail <= p_67 & mi(income_group)
replace income_group = 4 if income_group_detail <= p_95 & mi(income_group)
replace income_group = 5 if income_group_detail > p_95 & mi(income_group)
replace income_group = 0 if income_group_detail == 0

gen race = v201549x if v201549x>0 
recode race (.=0)
replace race = 3 if v201549x == 4 //asian is 3 in previous years
replace race = 4 if v201549x == 5 //native american is 4 in previous years
replace race = 5 if v201549x == 3 //hispanic is 5 in previous years

g gender = v201600
replace gender = 0 if gender < 0

gen region = v203003 

g year = 2020

keep v200001 self_emp working pid age_group edu* income_group* race gender region year v200010a v200010b
sa "$temp/2020", replace

*** read in raw data ***
import delimited using "$in/anes_timeseries_cdf_csv_20220916.csv", varname(1) clear

g year = vcf0004
drop if year == 2020

destring vcf9277, replace
g self_emp = vcf9277 == 3 | vcf9277 == 2
replace self_emp = . if mi(vcf9277) //these are before 1986 or in 2002

destring vcf0301, replace
g pid = (vcf0301 - 1)/6
replace pid =. if mi(vcf0301)|vcf0301 == 0

destring vcf0116, replace //missing values for pre 1970
g working = vcf0116 == 1
replace working = 1 if self_emp == 1 //CHECK

**********
destring vcf0114, replace
g income_group = vcf0114
replace income_group = 0 if mi(vcf0114)
g income_group_missing = mi(vcf0114)

destring vcf0102, replace
g age_group = vcf0102 //no missing

destring vcf0104, replace
g gender = vcf0104

destring vcf0105a, replace
g race = vcf0105a
replace race = 9 if mi(race)

destring vcf0110, replace
g edu = vcf0110 //no missing

destring vcf0112, replace
g region = vcf0112
replace region = 5 if mi(region)

append using "$temp/2020"
g weight = vcf0009z
replace weight = v200010a if mi(weight)

sa "$temp/base", replace
		
***************************************************
*** plots ***

*** unadjusted ***
u "$temp/base", clear

forval i = 1986(2)2000{
	reg pid self_emp [pweight = weight] if year == `i', vce(robust)
	g beta_`i' = _b[self_emp]
	g se_`i' = _se[self_emp]
	
	local df = `e(df_r)'
	local t = invttail(`df', 0.025)
	g upper_`i' = beta_`i' + `t'*se_`i'
	g lower_`i' = beta_`i' - `t'*se_`i'
}

forval i = 2004(4)2020{
	reg pid self_emp [pweight = weight] if year == `i', vce(robust)
	g beta_`i' = _b[self_emp]
	g se_`i' = _se[self_emp]
	
	local df = `e(df_r)'
	local t = invttail(`df', 0.025)
	g upper_`i' = beta_`i' + `t'*se_`i'
	g lower_`i' = beta_`i' - `t'*se_`i'
}

keep beta* se_* lower* upper*
duplicates drop

g z = 1
reshape long beta_ se_ upper_ lower_, i(z)j(year)

twoway rcap upper lower year, lstyle(ci) || line beta year, mstyle(p1) ///
	name(rcap, replace) ylabel(, nogrid) xscale(range(1984(2)2020)) xlab(1984(4)2020) ///
	yline(0, lc(grey)) legend(off) graphregion(color(white)) ///
	ytitle("7-Point Party ID (1 = Strong Republican)") xtitle(" ")
graph export "$graph/anes_unadjusted.png", replace


****************
*** adjusted ***
u "$temp/base", clear

replace income_group = income_group_detail if year == 2020
replace edu = edu_detail if year == 2020
forval i = 1986(2)2000{
	reghdfe pid self_emp [pweight = weight] if year == `i', absorb(i.income_group i.age_group i.gender i.race i.edu i.region) vce(robust)
	g beta_`i' = _b[self_emp]
	g se_`i' = _se[self_emp]
	
	local df = `e(df_r)'
	local t = invttail(`df', 0.025)
	g upper_`i' = beta_`i' + `t'*se_`i'
	g lower_`i' = beta_`i' - `t'*se_`i'
}

forval i = 2004(4)2020{
	reghdfe pid self_emp [pweight = weight] if year == `i', absorb(i.income_group i.age_group i.gender i.race i.edu i.region) vce(robust)
	g beta_`i' = _b[self_emp]
	g se_`i' = _se[self_emp]
	
	local df = `e(df_r)'
	local t = invttail(`df', 0.025)
	g upper_`i' = beta_`i' + `t'*se_`i'
	g lower_`i' = beta_`i' - `t'*se_`i'
}

keep beta* se_* lower* upper*
duplicates drop

g z = 1
reshape long beta_ se_ upper_ lower_, i(z)j(year)

twoway rcap upper lower year, lstyle(ci) || line beta year, mstyle(p1) ///
	name(rcap, replace) ylabel(, nogrid) xscale(range(1984(2)2020)) xlab(1984(4)2020) ///
	yline(0, lc(grey)) legend(off) graphregion(color(white)) ///
	ytitle("7-Point Party ID (1 = Strong Republican)") xtitle(" ")
graph export "$graph/anes_adjusted.png", replace

*** multi period avgs: 4 years ***
*** unadjusted ***
u "$temp/base", clear

keep year pid self_emp weight income_group age_group gender race edu region
sa "$temp/relevant var", replace

*** 4 yr avg, i.e. 1990 would be avg of 1986-1990 ***
forval i = 1990(2)2000{
	u "$temp/relevant var", clear
	keep if inrange(year, `i'-4, `i')
	g year_bracket = `i'
	sa "$temp/`i'", replace
}

forval i = 2004(4)2020{
	u "$temp/relevant var", clear
	keep if inrange(year, `i'-4, `i')
	g year_bracket = `i'
	sa "$temp/`i'", replace
}

clear 
forval i = 1990(2)2000{
	append using "$temp/`i'"
}

forval i = 2004(4)2020{
	append using "$temp/`i'"
}
sa "$temp/enlarged_base", replace

*** 7-point party scale: 1 = strong R ***
forval i = 1990(2)2000{
	reg pid self_emp [pweight = weight] if year_bracket == `i', vce(robust)
	g beta_`i' = _b[self_emp]
	g se_`i' = _se[self_emp]
	
	local df = `e(df_r)'
	local t = invttail(`df', 0.025)
	g upper_`i' = beta_`i' + `t'*se_`i'
	g lower_`i' = beta_`i' - `t'*se_`i'
}

forval i = 2004(4)2020{
	reg pid self_emp [pweight = weight] if year_bracket == `i', vce(robust)
	g beta_`i' = _b[self_emp]
	g se_`i' = _se[self_emp]
	
	local df = `e(df_r)'
	local t = invttail(`df', 0.025)
	g upper_`i' = beta_`i' + `t'*se_`i'
	g lower_`i' = beta_`i' - `t'*se_`i'
}

keep beta* se_* lower* upper*
duplicates drop

g z = 1
reshape long beta_ se_ upper_ lower_, i(z)j(year)

twoway rcap upper lower year, lstyle(ci) || line beta year, mstyle(p1) ///
	name(rcap, replace) ylabel(, nogrid) xscale(range(1990(2)2020)) xlab(1988(4)2020) ///
	yline(0, lc(grey)) legend(off) graphregion(color(white)) ///
	ytitle("Difference in Republican Identity (Unadjusted)") xtitle(" ")
graph export "$graph/anes_4yr_bracket_unadjusted.png", replace

*********************************************************************************
*** adjusted ***
u "$temp/enlarged_base", clear

*** 7-point party scale: 1 = strong R ***
forval i = 1990(2)2000{
	reghdfe pid self_emp [pweight = weight] if year_bracket == `i', absorb(i.income_group i.age_group i.gender i.race i.edu i.region) vce(robust)
	g beta_`i' = _b[self_emp]
	g se_`i' = _se[self_emp]
	
	local df = `e(df_r)'
	local t = invttail(`df', 0.025)
	g upper_`i' = beta_`i' + `t'*se_`i'
	g lower_`i' = beta_`i' - `t'*se_`i'
}

forval i = 2004(4)2020{
	reghdfe pid self_emp [pweight = weight] if year_bracket == `i', absorb(i.income_group i.age_group i.gender i.race i.edu i.region) vce(robust)
	g beta_`i' = _b[self_emp]
	g se_`i' = _se[self_emp]
	
	local df = `e(df_r)'
	local t = invttail(`df', 0.025)
	g upper_`i' = beta_`i' + `t'*se_`i'
	g lower_`i' = beta_`i' - `t'*se_`i'
}

keep beta* se_* lower* upper*
duplicates drop

g z = 1
reshape long beta_ se_ upper_ lower_, i(z)j(year)

twoway rcap upper lower year, lstyle(ci) || line beta year, mstyle(p1) ///
	name(rcap, replace) ylabel(, nogrid) xscale(range(1990(2)2020)) xlab(1988(4)2020) ///
	yline(0, lc(grey)) legend(off) graphregion(color(white)) ///
	ytitle("Difference in Republican Identity (Adjusted)") xtitle(" ")
graph export "$graph/anes_4yr_bracket_adjusted.png", replace

*****************************************************************************

*** GSS ***
*****************************************************************************
u "$in/gss7222_r3.dta", clear

gen self_emp = wrkslf==1 if inlist(wrkslf,1,2)

// pid contintuous
replace partyid = 3 if partyid == 7
gen pid = partyid/6 if partyid!=.n

// coding up controls
gen agenew = age

g age_group = 1 if inrange(age, 18,24)
replace age_group = 2 if inrange(age, 25, 34)
replace age_group = 3 if inrange(age, 35, 44)
replace age_group = 4 if inrange(age, 45, 54)
replace age_group = 5 if inrange(age, 55, 64)
replace age_group = 6 if inrange(age, 65, 74)
replace age_group = 7 if age > 74
replace age_group = 0 if agenew==.d&agenew==.i&agenew==.n&agenew==.s

gen edu = degree + 1
replace edu = 0 if inlist(degree, .d,.n,.s)

replace wtssps = 1 if year < 1988

g income_group = income16 if !inlist(income16, .d, .i, .n, .s, .r)

foreach v of varlist income06 income98 income91 income86 income82 income77 income72 income{
	replace income_group = `v' if !inlist(`v', .d, .i, .n, .s, .r) & mi(income_group)
}

preserve
	keep income_group year wtssps
	collapse (p25)income_p25 = income_group (p50)income_p50 = income_group (p75)income_p75 = income_group [pweight = wtssps], by (year)
	sa "$temp/income_q", replace
restore

merge m:1 year using "$temp/income_q", nogen

g income_quantile = 1 if income_group <= income_p25 & !mi(income_group)
replace income_quantile = 2 if income_group <= income_p50 & mi(income_quantile) & !mi(income_group)
replace income_quantile = 3 if income_group <= income_p75 & mi(income_quantile) & !mi(income_group)
replace income_quantile = 4 if income_group > income_p75 & mi(income_quantile) & !mi(income_group)
replace income_quantile = 0 if mi(income_quantile)

replace race = 0 if race == .i

g gender = sex
replace gender = 0 if inlist(sex,.i, .d,.n,.s)

sa "$temp/base", replace

******************************************************************************
*** plots ***
*** unadjusted ***
u "$temp/base", clear
forval i = 1972/2022{
	capture reg pid self_emp [pweight = wtssps] if year == `i', vce(robust)
	capture g beta_`i' = _b[self_emp]
	capture g se_`i' = _se[self_emp]
	
	capture local df = `e(df_r)'
	capture local t = invttail(`df', 0.025)
	capture g upper_`i' = beta_`i' + `t'*se_`i'
	capture g lower_`i' = beta_`i' - `t'*se_`i'
}

keep beta* se_* lower* upper*
duplicates drop

g z = 1
reshape long beta_ se_ upper_ lower_, i(z)j(year)

twoway rcap upper lower year, lstyle(ci) || line beta year, mstyle(p1) ///
	name(rcap, replace) ylabel(, nogrid) xscale(range(1972(2)2022)) xlab(1972(4)2022) ///
	yline(0, lc(grey)) legend(off) graphregion(color(white)) ///
	ytitle("Difference in Republican Identity (Unadjusted)") xtitle(" ")
graph export "$graph/gss_unadjusted.png", replace


****************************
*** adjusted ***
u "$temp/base", clear
forval i = 1972/2022{
	capture reghdfe pid self_emp [pweight = wtssps] if year == `i', absorb(i.income_group i.age_group i.gender i.race i.edu i.region) vce(robust)
	capture g beta_`i' = _b[self_emp]
	capture g se_`i' = _se[self_emp]
	
	capture local df = `e(df_r)'
	capture local t = invttail(`df', 0.025)
	capture g upper_`i' = beta_`i' + `t'*se_`i'
	capture g lower_`i' = beta_`i' - `t'*se_`i'
}

keep beta* se_* lower* upper*
duplicates drop

g z = 1
reshape long beta_ se_ upper_ lower_, i(z)j(year)

twoway rcap upper lower year, lstyle(ci) || line beta year, mstyle(p1) ///
	name(rcap, replace) ylabel(, nogrid) xscale(range(1972(2)2022)) xlab(1972(4)2022) ///
	yline(0, lc(grey)) legend(off) graphregion(color(white)) ///
	ytitle("Difference in Republican Identity (Adjusted)") xtitle(" ")
graph export "$graph/gss_adjusted.png", replace

*********************************************************************************
*** multi period avgs: 4 years ***

*** unadjusted ***
u "$temp/base", clear

keep year pid self_emp wtssps income_quantile age_group gender race edu region id
sa "$temp/relevant var", replace

*** 4 yr avg, i.e. 1990 would be avg of 1986-1990 ***
forval i = 1976(2)2022{
	u "$temp/relevant var", clear
	keep if inrange(year, `i'-4, `i')
	g year_bracket = `i'
	sa "$temp/`i'", replace
}

clear 
forval i = 1976(2)2022{
	append using "$temp/`i'"
}

sa "$temp/enlarged_base", replace

forval i = 1976(2)2022{
	capture reg pid self_emp [pweight = wtssps] if year_bracket == `i', vce(robust)
	capture g beta_`i' = _b[self_emp]
	capture g se_`i' = _se[self_emp]
	
	capture local df = `e(df_r)'
	capture local t = invttail(`df', 0.025)
	capture g upper_`i' = beta_`i' + `t'*se_`i'
	capture g lower_`i' = beta_`i' - `t'*se_`i'
}

keep beta* se_* lower* upper*
duplicates drop

g z = 1
reshape long beta_ se_ upper_ lower_, i(z)j(year)

twoway rcap upper lower year, lstyle(ci) || line beta year, mstyle(p1) ///
	name(rcap, replace) ylabel(, nogrid) xscale(range(1976(2)2022)) xlab(1976(4)2022) ///
	yline(0, lc(grey)) legend(off) graphregion(color(white)) ///
	ytitle("Difference in Republican Identity (Unadjusted)") xtitle(" ")
graph export "$graph/gss_4yr_bracket_unadjusted.png", replace


************************
u "$temp/enlarged_base", clear

forval i = 1976/2022{
	capture reghdfe pid self_emp [pweight = wtssps] if year_bracket == `i', absorb(i.income_quantile i.age_group i.gender i.race i.edu i.region) vce(robust)
	capture g beta_`i' = _b[self_emp]
	capture g se_`i' = _se[self_emp]
	
	capture local df = `e(df_r)'
	capture local t = invttail(`df', 0.025)
	capture g upper_`i' = beta_`i' + `t'*se_`i'
	capture g lower_`i' = beta_`i' - `t'*se_`i'
}

keep beta* se_* lower* upper*
duplicates drop

g z = 1
reshape long beta_ se_ upper_ lower_, i(z)j(year)

twoway rcap upper lower year, lstyle(ci) || line beta year, mstyle(p1) ///
	name(rcap, replace) ylabel(, nogrid) xscale(range(1976(2)2022)) xlab(1976(4)2022) ///
	yline(0, lc(grey)) legend(off) graphregion(color(white)) ///
	ytitle("Difference in Republican Identity (Adjusted)") xtitle(" ")
graph export "$graph/gss_4yr_bracket_adjusted.png", replace
